In [34]:
!pip install pandas
!pip install requests
!pip install bs4
!pip install html5lib
!pip install lxml
!pip install plotly
!pip install numpy
!pip install yfinance
import pandas as pd
import requests
import yfinance as yf
from bs4 import BeautifulSoup
Requirement already satisfied: pandas in c:\users\moses\anaconda3\lib\site-packages (2.3.3) Requirement already satisfied: numpy>=1.26.0 in c:\users\moses\anaconda3\lib\site-packages (from pandas) (2.1.3) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\moses\anaconda3\lib\site-packages (from pandas) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in c:\users\moses\anaconda3\lib\site-packages (from pandas) (2024.1) Requirement already satisfied: tzdata>=2022.7 in c:\users\moses\anaconda3\lib\site-packages (from pandas) (2025.2) Requirement already satisfied: six>=1.5 in c:\users\moses\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0) Requirement already satisfied: requests in c:\users\moses\anaconda3\lib\site-packages (2.32.3) Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\moses\anaconda3\lib\site-packages (from requests) (3.3.2) Requirement already satisfied: idna<4,>=2.5 in c:\users\moses\anaconda3\lib\site-packages (from requests) (3.7) Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\moses\anaconda3\lib\site-packages (from requests) (2.3.0) Requirement already satisfied: certifi>=2017.4.17 in c:\users\moses\anaconda3\lib\site-packages (from requests) (2025.8.3) Requirement already satisfied: bs4 in c:\users\moses\anaconda3\lib\site-packages (0.0.2) Requirement already satisfied: beautifulsoup4 in c:\users\moses\anaconda3\lib\site-packages (from bs4) (4.14.2) Requirement already satisfied: soupsieve>1.2 in c:\users\moses\anaconda3\lib\site-packages (from beautifulsoup4->bs4) (2.5) Requirement already satisfied: typing-extensions>=4.0.0 in c:\users\moses\anaconda3\lib\site-packages (from beautifulsoup4->bs4) (4.12.2) Requirement already satisfied: html5lib in c:\users\moses\anaconda3\lib\site-packages (1.1) Requirement already satisfied: six>=1.9 in c:\users\moses\anaconda3\lib\site-packages (from html5lib) (1.17.0) Requirement already satisfied: webencodings in c:\users\moses\anaconda3\lib\site-packages (from html5lib) (0.5.1) Requirement already satisfied: lxml in c:\users\moses\anaconda3\lib\site-packages (5.3.0) Requirement already satisfied: plotly in c:\users\moses\anaconda3\lib\site-packages (5.24.1) Requirement already satisfied: tenacity>=6.2.0 in c:\users\moses\anaconda3\lib\site-packages (from plotly) (9.0.0) Requirement already satisfied: packaging in c:\users\moses\anaconda3\lib\site-packages (from plotly) (24.2) Requirement already satisfied: numpy in c:\users\moses\anaconda3\lib\site-packages (2.1.3) Requirement already satisfied: yfinance in c:\users\moses\anaconda3\lib\site-packages (0.2.66) Requirement already satisfied: pandas>=1.3.0 in c:\users\moses\anaconda3\lib\site-packages (from yfinance) (2.3.3) Requirement already satisfied: numpy>=1.16.5 in c:\users\moses\anaconda3\lib\site-packages (from yfinance) (2.1.3) Requirement already satisfied: requests>=2.31 in c:\users\moses\anaconda3\lib\site-packages (from yfinance) (2.32.3) Requirement already satisfied: multitasking>=0.0.7 in c:\users\moses\anaconda3\lib\site-packages (from yfinance) (0.0.12) Requirement already satisfied: platformdirs>=2.0.0 in c:\users\moses\anaconda3\lib\site-packages (from yfinance) (4.3.7) Requirement already satisfied: pytz>=2022.5 in c:\users\moses\anaconda3\lib\site-packages (from yfinance) (2024.1) Requirement already satisfied: frozendict>=2.3.4 in c:\users\moses\anaconda3\lib\site-packages (from yfinance) (2.4.2) Requirement already satisfied: peewee>=3.16.2 in c:\users\moses\anaconda3\lib\site-packages (from yfinance) (3.18.2) Requirement already satisfied: beautifulsoup4>=4.11.1 in c:\users\moses\anaconda3\lib\site-packages (from yfinance) (4.14.2) Requirement already satisfied: curl_cffi>=0.7 in c:\users\moses\anaconda3\lib\site-packages (from yfinance) (0.13.0) Requirement already satisfied: protobuf>=3.19.0 in c:\users\moses\anaconda3\lib\site-packages (from yfinance) (5.29.3) Requirement already satisfied: websockets>=13.0 in c:\users\moses\anaconda3\lib\site-packages (from yfinance) (15.0.1) Requirement already satisfied: soupsieve>1.2 in c:\users\moses\anaconda3\lib\site-packages (from beautifulsoup4>=4.11.1->yfinance) (2.5) Requirement already satisfied: typing-extensions>=4.0.0 in c:\users\moses\anaconda3\lib\site-packages (from beautifulsoup4>=4.11.1->yfinance) (4.12.2) Requirement already satisfied: cffi>=1.12.0 in c:\users\moses\anaconda3\lib\site-packages (from curl_cffi>=0.7->yfinance) (1.17.1) Requirement already satisfied: certifi>=2024.2.2 in c:\users\moses\anaconda3\lib\site-packages (from curl_cffi>=0.7->yfinance) (2025.8.3) Requirement already satisfied: pycparser in c:\users\moses\anaconda3\lib\site-packages (from cffi>=1.12.0->curl_cffi>=0.7->yfinance) (2.21) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\moses\anaconda3\lib\site-packages (from pandas>=1.3.0->yfinance) (2.9.0.post0) Requirement already satisfied: tzdata>=2022.7 in c:\users\moses\anaconda3\lib\site-packages (from pandas>=1.3.0->yfinance) (2025.2) Requirement already satisfied: six>=1.5 in c:\users\moses\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas>=1.3.0->yfinance) (1.17.0) Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\moses\anaconda3\lib\site-packages (from requests>=2.31->yfinance) (3.3.2) Requirement already satisfied: idna<4,>=2.5 in c:\users\moses\anaconda3\lib\site-packages (from requests>=2.31->yfinance) (3.7) Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\moses\anaconda3\lib\site-packages (from requests>=2.31->yfinance) (2.3.0)
In [4]:
!mamba install bs4==4.10.0 -y
!pip install lxml==4.6.4
!mamba install html5lib==1.1 -y
!pip install pandas
!pip install --upgrade beautifulsoup4
!pip install --upgrade pandas
'mamba' is not recognized as an internal or external command, operable program or batch file.
Collecting lxml==4.6.4 Using cached lxml-4.6.4.tar.gz (3.2 MB) Preparing metadata (setup.py): started Preparing metadata (setup.py): finished with status 'done' Building wheels for collected packages: lxml Building wheel for lxml (setup.py): started Building wheel for lxml (setup.py): finished with status 'error' Running setup.py clean for lxml Failed to build lxml
DEPRECATION: Building 'lxml' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'lxml'. Discussion can be found at https://github.com/pypa/pip/issues/6334
error: subprocess-exited-with-error
python setup.py bdist_wheel did not run successfully.
exit code: 1
[76 lines of output]
Building lxml version 4.6.4.
C:\Users\moses\AppData\Local\Temp\pip-install-_59y0enb\lxml_52ccef85c02a4aaea0feea5eef4aff49\setup.py:67: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html
import pkg_resources
Building without Cython.
Building against pre-built libxml2 andl libxslt libraries
running bdist_wheel
running build
running build_py
creating build
creating build\lib.win-amd64-cpython-313
creating build\lib.win-amd64-cpython-313\lxml
copying src\lxml\builder.py -> build\lib.win-amd64-cpython-313\lxml
copying src\lxml\cssselect.py -> build\lib.win-amd64-cpython-313\lxml
copying src\lxml\doctestcompare.py -> build\lib.win-amd64-cpython-313\lxml
copying src\lxml\ElementInclude.py -> build\lib.win-amd64-cpython-313\lxml
copying src\lxml\pyclasslookup.py -> build\lib.win-amd64-cpython-313\lxml
copying src\lxml\sax.py -> build\lib.win-amd64-cpython-313\lxml
copying src\lxml\usedoctest.py -> build\lib.win-amd64-cpython-313\lxml
copying src\lxml\_elementpath.py -> build\lib.win-amd64-cpython-313\lxml
copying src\lxml\__init__.py -> build\lib.win-amd64-cpython-313\lxml
creating build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\__init__.py -> build\lib.win-amd64-cpython-313\lxml\includes
creating build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\builder.py -> build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\clean.py -> build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\defs.py -> build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\diff.py -> build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\ElementSoup.py -> build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\formfill.py -> build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\html5parser.py -> build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\soupparser.py -> build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\usedoctest.py -> build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\_diffcommand.py -> build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\_html5builder.py -> build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\_setmixin.py -> build\lib.win-amd64-cpython-313\lxml\html
copying src\lxml\html\__init__.py -> build\lib.win-amd64-cpython-313\lxml\html
creating build\lib.win-amd64-cpython-313\lxml\isoschematron
copying src\lxml\isoschematron\__init__.py -> build\lib.win-amd64-cpython-313\lxml\isoschematron
copying src\lxml\etree.h -> build\lib.win-amd64-cpython-313\lxml
copying src\lxml\etree_api.h -> build\lib.win-amd64-cpython-313\lxml
copying src\lxml\lxml.etree.h -> build\lib.win-amd64-cpython-313\lxml
copying src\lxml\lxml.etree_api.h -> build\lib.win-amd64-cpython-313\lxml
copying src\lxml\includes\c14n.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\config.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\dtdvalid.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\etreepublic.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\htmlparser.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\relaxng.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\schematron.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\tree.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\uri.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\xinclude.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\xmlerror.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\xmlparser.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\xmlschema.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\xpath.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\xslt.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\__init__.pxd -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\etree_defs.h -> build\lib.win-amd64-cpython-313\lxml\includes
copying src\lxml\includes\lxml-version.h -> build\lib.win-amd64-cpython-313\lxml\includes
creating build\lib.win-amd64-cpython-313\lxml\isoschematron\resources
creating build\lib.win-amd64-cpython-313\lxml\isoschematron\resources\rng
copying src\lxml\isoschematron\resources\rng\iso-schematron.rng -> build\lib.win-amd64-cpython-313\lxml\isoschematron\resources\rng
creating build\lib.win-amd64-cpython-313\lxml\isoschematron\resources\xsl
copying src\lxml\isoschematron\resources\xsl\RNG2Schtrn.xsl -> build\lib.win-amd64-cpython-313\lxml\isoschematron\resources\xsl
copying src\lxml\isoschematron\resources\xsl\XSD2Schtrn.xsl -> build\lib.win-amd64-cpython-313\lxml\isoschematron\resources\xsl
creating build\lib.win-amd64-cpython-313\lxml\isoschematron\resources\xsl\iso-schematron-xslt1
copying src\lxml\isoschematron\resources\xsl\iso-schematron-xslt1\iso_abstract_expand.xsl -> build\lib.win-amd64-cpython-313\lxml\isoschematron\resources\xsl\iso-schematron-xslt1
copying src\lxml\isoschematron\resources\xsl\iso-schematron-xslt1\iso_dsdl_include.xsl -> build\lib.win-amd64-cpython-313\lxml\isoschematron\resources\xsl\iso-schematron-xslt1
copying src\lxml\isoschematron\resources\xsl\iso-schematron-xslt1\iso_schematron_message.xsl -> build\lib.win-amd64-cpython-313\lxml\isoschematron\resources\xsl\iso-schematron-xslt1
copying src\lxml\isoschematron\resources\xsl\iso-schematron-xslt1\iso_schematron_skeleton_for_xslt1.xsl -> build\lib.win-amd64-cpython-313\lxml\isoschematron\resources\xsl\iso-schematron-xslt1
copying src\lxml\isoschematron\resources\xsl\iso-schematron-xslt1\iso_svrl_for_xslt1.xsl -> build\lib.win-amd64-cpython-313\lxml\isoschematron\resources\xsl\iso-schematron-xslt1
copying src\lxml\isoschematron\resources\xsl\iso-schematron-xslt1\readme.txt -> build\lib.win-amd64-cpython-313\lxml\isoschematron\resources\xsl\iso-schematron-xslt1
running build_ext
building 'lxml.etree' extension
error: Microsoft Visual C++ 14.0 or greater is required. Get it with "Microsoft C++ Build Tools": https://visualstudio.microsoft.com/visual-cpp-build-tools/
[end of output]
note: This error originates from a subprocess, and is likely not a problem with pip.
ERROR: Failed building wheel for lxml
ERROR: Failed to build installable wheels for some pyproject.toml based projects (lxml)
'mamba' is not recognized as an internal or external command,
operable program or batch file.
Requirement already satisfied: pandas in c:\users\moses\anaconda3\lib\site-packages (2.2.3)
Requirement already satisfied: numpy>=1.26.0 in c:\users\moses\anaconda3\lib\site-packages (from pandas) (2.1.3)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\moses\anaconda3\lib\site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\moses\anaconda3\lib\site-packages (from pandas) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\moses\anaconda3\lib\site-packages (from pandas) (2025.2)
Requirement already satisfied: six>=1.5 in c:\users\moses\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)
Requirement already satisfied: beautifulsoup4 in c:\users\moses\anaconda3\lib\site-packages (4.14.2)
Requirement already satisfied: soupsieve>1.2 in c:\users\moses\anaconda3\lib\site-packages (from beautifulsoup4) (2.5)
Requirement already satisfied: typing-extensions>=4.0.0 in c:\users\moses\anaconda3\lib\site-packages (from beautifulsoup4) (4.12.2)
Requirement already satisfied: pandas in c:\users\moses\anaconda3\lib\site-packages (2.2.3)
Collecting pandas
Downloading pandas-2.3.3-cp313-cp313-win_amd64.whl.metadata (19 kB)
Requirement already satisfied: numpy>=1.26.0 in c:\users\moses\anaconda3\lib\site-packages (from pandas) (2.1.3)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\moses\anaconda3\lib\site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\moses\anaconda3\lib\site-packages (from pandas) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\moses\anaconda3\lib\site-packages (from pandas) (2025.2)
Requirement already satisfied: six>=1.5 in c:\users\moses\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)
Downloading pandas-2.3.3-cp313-cp313-win_amd64.whl (11.0 MB)
---------------------------------------- 0.0/11.0 MB ? eta -:--:--
--- ------------------------------------ 1.0/11.0 MB 6.6 MB/s eta 0:00:02
--------- ------------------------------ 2.6/11.0 MB 7.1 MB/s eta 0:00:02
------------- -------------------------- 3.7/11.0 MB 6.2 MB/s eta 0:00:02
------------------- -------------------- 5.2/11.0 MB 7.0 MB/s eta 0:00:01
------------------------ --------------- 6.8/11.0 MB 6.9 MB/s eta 0:00:01
----------------------------- ---------- 8.1/11.0 MB 7.0 MB/s eta 0:00:01
------------------------------- -------- 8.7/11.0 MB 6.2 MB/s eta 0:00:01
---------------------------------- ----- 9.4/11.0 MB 5.8 MB/s eta 0:00:01
--------------------------------------- 10.7/11.0 MB 5.9 MB/s eta 0:00:01
---------------------------------------- 11.0/11.0 MB 5.8 MB/s eta 0:00:00
Installing collected packages: pandas
Attempting uninstall: pandas
Found existing installation: pandas 2.2.3
Uninstalling pandas-2.2.3:
Successfully uninstalled pandas-2.2.3
Successfully installed pandas-2.3.3
In [16]:
#Question 1 - Extracting Tesla Stock Data Using yfinance - 2 Points
Tesla = yf.Ticker("TSLA")
tesla_data = Tesla.history(period="max")
tesla_data
Out[16]:
| Open | High | Low | Close | Volume | Dividends | Stock Splits | |
|---|---|---|---|---|---|---|---|
| Date | |||||||
| 2010-06-29 00:00:00-04:00 | 1.266667 | 1.666667 | 1.169333 | 1.592667 | 281494500 | 0.0 | 0.0 |
| 2010-06-30 00:00:00-04:00 | 1.719333 | 2.028000 | 1.553333 | 1.588667 | 257806500 | 0.0 | 0.0 |
| 2010-07-01 00:00:00-04:00 | 1.666667 | 1.728000 | 1.351333 | 1.464000 | 123282000 | 0.0 | 0.0 |
| 2010-07-02 00:00:00-04:00 | 1.533333 | 1.540000 | 1.247333 | 1.280000 | 77097000 | 0.0 | 0.0 |
| 2010-07-06 00:00:00-04:00 | 1.333333 | 1.333333 | 1.055333 | 1.074000 | 103003500 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 2025-09-29 00:00:00-04:00 | 444.350006 | 450.980011 | 439.500000 | 443.209991 | 79491500 | 0.0 | 0.0 |
| 2025-09-30 00:00:00-04:00 | 441.519989 | 445.000000 | 433.119995 | 444.720001 | 74358000 | 0.0 | 0.0 |
| 2025-10-01 00:00:00-04:00 | 443.799988 | 462.290009 | 440.750000 | 459.459991 | 98122300 | 0.0 | 0.0 |
| 2025-10-02 00:00:00-04:00 | 470.540009 | 470.750000 | 435.570007 | 436.000000 | 137009000 | 0.0 | 0.0 |
| 2025-10-03 00:00:00-04:00 | 443.290009 | 446.769989 | 416.579987 | 429.829987 | 132772600 | 0.0 | 0.0 |
3841 rows × 7 columns
In [19]:
#Question 2 - Extracting Tesla Revenue Data Using Webscraping - 1 Points
import pandas as pd
import warnings
warnings.simplefilter("ignore")
from bs4 import BeautifulSoup
import requests
# Get the webpage content
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/91.0.4472.124 Safari/537.36"
}
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm"
data = requests.get(url, headers=headers).text
soup = BeautifulSoup(data, "html.parser")
import re
page_text = soup.get_text("\n", strip=True)
match = re.search(r"Tesla\s+Quarterly\s+Revenue", page_text, re.I)
if not match:
raise RuntimeError("Tesla Quarterly Revenue section not found.")
after = page_text[match.end():]
pattern = re.compile(r"(\d{4}-\d{2}-\d{2})\s+\$([\d,]+)")
rows = pattern.findall(after)
tesla_revenue = pd.DataFrame(clean_rows)
tesla_revenue["Revenue"] = tesla_revenue["Revenue"].astype(float)
print(tesla_revenue.tail())
Date Revenue 48 2010-09-30 31.0 49 2010-06-30 28.0 50 2010-03-31 21.0 51 2009-09-30 46.0 52 2009-06-30 27.0
In [35]:
#Question 3 - Extracting GameStop Stock Data Using yfinance - 2 Points
Game = yf.Ticker("GME")
game_data = Game.history(period="max")
game_data
Out[35]:
| Open | High | Low | Close | Volume | Dividends | Stock Splits | |
|---|---|---|---|---|---|---|---|
| Date | |||||||
| 2002-02-13 00:00:00-05:00 | 1.620129 | 1.693350 | 1.603296 | 1.691667 | 76216000 | 0.0 | 0.0 |
| 2002-02-14 00:00:00-05:00 | 1.712707 | 1.716074 | 1.670626 | 1.683250 | 11021600 | 0.0 | 0.0 |
| 2002-02-15 00:00:00-05:00 | 1.683250 | 1.687458 | 1.658001 | 1.674834 | 8389600 | 0.0 | 0.0 |
| 2002-02-19 00:00:00-05:00 | 1.666418 | 1.666418 | 1.578047 | 1.607504 | 7410400 | 0.0 | 0.0 |
| 2002-02-20 00:00:00-05:00 | 1.615921 | 1.662210 | 1.603296 | 1.662210 | 6892800 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 2025-09-29 00:00:00-04:00 | 27.200001 | 27.209999 | 26.790001 | 27.209999 | 7979000 | 0.0 | 0.0 |
| 2025-09-30 00:00:00-04:00 | 27.209999 | 27.340000 | 26.799999 | 27.280001 | 5392800 | 0.0 | 0.0 |
| 2025-10-01 00:00:00-04:00 | 27.250000 | 27.790001 | 27.180000 | 27.690001 | 8187100 | 0.0 | 0.0 |
| 2025-10-02 00:00:00-04:00 | 28.000000 | 28.100000 | 26.719999 | 27.219999 | 14526000 | 0.0 | 0.0 |
| 2025-10-03 00:00:00-04:00 | 26.360001 | 26.990000 | 25.160000 | 25.379999 | 11565300 | 0.0 | 0.0 |
5949 rows × 7 columns
In [27]:
#Question 4 - Extracting GameStop Revenue Data Using Webscraping - 1 Points
import pandas as pd
import warnings, re
warnings.simplefilter("ignore")
from bs4 import BeautifulSoup
import requests
# Get the webpage content
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/91.0.4472.124 Safari/537.36"
}
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/stock.html"
html = requests.get(url, headers=headers).text
soup = BeautifulSoup(html, "html.parser")
text = soup.get_text("\n", strip=True)
start_pat = re.compile(r"GameStop\s+Quarterly\s+Revenue", re.I)
start_m = start_pat.search(text)
if not start_m:
raise RuntimeError("GameStop Quarterly Revenue section not found in page text.")
start_idx = start_m.end()
end_idx = text.find("Sector Industry", start_idx)
if end_idx == -1:
for stop_word in ["Stock Name", "We Need Your Support", "GameStop Annual Revenue"]:
end_idx = text.find(stop_word, start_idx)
if end_idx != -1:
break
if end_idx == -1:
end_idx = len(text)
block = text[start_idx:end_idx]
# Find all lines like: YYYY-MM-DD $#,###
pairs = re.findall(r"(\d{4}-\d{2}-\d{2})\s+\$?\s*([\d,]+)", block)
game_revenue = pd.DataFrame(pairs, columns=["Date", "Revenue"])
game_revenue["Revenue"] = (
game_revenue["Revenue"]
.str.replace(",", "", regex=False)
.astype(float)
)
print(game_revenue.tail())
Date Revenue 57 2006-01-31 1667.0 58 2005-10-31 534.0 59 2005-07-31 416.0 60 2005-04-30 475.0 61 2005-01-31 709.0
In [29]:
#Question 6 - GameStop Stock and Revenue Dashboard- 2 Points
import plotly.express as px
import pandas as pd
import requests
import re, requests, pandas as pd
import matplotlib.pyplot as plt
url = "https://stockanalysis.com/stocks/gme/revenue/"
game_revenue = pd.DataFrame(columns=["Date", "Revenue"])
tables = pd.read_html(
"https://stockanalysis.com/stocks/gme/revenue/",
storage_options={"User-Agent": "Mozilla/5.0"}
)
revenue_table = tables[0]
for i, row in revenue_table.iterrows():
date = row[0]
revenue = row[1]
game_revenue.loc[i] = [date, revenue]
print(game_revenue)
df = tables[0].iloc[:, :2].copy()
df.columns = ["Date", "Revenue"]
display(df)
fig = px.line(df, x="Date", y="Revenue", title= "GameStop Quarterly Revenue")
fig.update_traces(mode="lines+markers")
fig.update_layout(xaxis_title="Date", yaxis_title="Revenue", template="plotly_white")
fig.show()
Date Revenue 0 Feb 1, 2025 3.82B 1 Feb 3, 2024 5.27B 2 Jan 28, 2023 5.93B 3 Jan 29, 2022 6.01B 4 Jan 30, 2021 5.09B 5 Feb 1, 2020 Upgrade Pro 6 Feb 2, 2019 Upgrade Pro 7 Feb 3, 2018 Upgrade Pro 8 Jan 28, 2017 Upgrade Pro 9 Jan 30, 2016 Upgrade Pro
| Date | Revenue | |
|---|---|---|
| 0 | Feb 1, 2025 | 3.82B |
| 1 | Feb 3, 2024 | 5.27B |
| 2 | Jan 28, 2023 | 5.93B |
| 3 | Jan 29, 2022 | 6.01B |
| 4 | Jan 30, 2021 | 5.09B |
| 5 | Feb 1, 2020 | Upgrade Pro |
| 6 | Feb 2, 2019 | Upgrade Pro |
| 7 | Feb 3, 2018 | Upgrade Pro |
| 8 | Jan 28, 2017 | Upgrade Pro |
| 9 | Jan 30, 2016 | Upgrade Pro |
[*********************100%***********************] 1 of 1 completed
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[29], line 40 37 revenue = df.copy() 39 # Make the dashboard ---> 40 make_graph(stock, revenue, "GameStop") NameError: name 'make_graph' is not defined
In [1]:
#Question 5 - Tesla Stock and Revenue Dashboard - 2 Points
import plotly.express as px
import pandas as pd
import requests
import re, requests, pandas as pd
import matplotlib.pyplot as plt
url = "https://stockanalysis.com/stocks/tsla/revenue/"
game_revenue = pd.DataFrame(columns=["Date", "Revenue"])
tables = pd.read_html(
"https://stockanalysis.com/stocks/tsla/revenue/",
storage_options={"User-Agent": "Mozilla/5.0"}
)
revenue_table = tables[0]
for i, row in revenue_table.iterrows():
date = row[0]
revenue = row[1]
game_revenue.loc[i] = [date, revenue]
print(game_revenue)
df = tables[0].iloc[:, :2].copy()
df.columns = ["Date", "Revenue"]
display(df)
fig = px.line(df, x="Date", y="Revenue", title= "Tesla Quarterly Revenue")
fig.update_traces(mode="lines+markers")
fig.update_layout(xaxis_title="Date", yaxis_title="Revenue", template="plotly_white")
fig.show()
Date Revenue 0 Dec 31, 2024 97.69B 1 Dec 31, 2023 96.77B 2 Dec 31, 2022 81.46B 3 Dec 31, 2021 53.82B 4 Dec 31, 2020 31.54B 5 Dec 31, 2019 Upgrade Pro 6 Dec 31, 2018 Upgrade Pro 7 Dec 31, 2017 Upgrade Pro 8 Dec 31, 2016 Upgrade Pro 9 Dec 31, 2015 Upgrade Pro
C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:16: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` date = row[0] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` revenue = row[1] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:16: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` date = row[0] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` revenue = row[1] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:16: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` date = row[0] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` revenue = row[1] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:16: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` date = row[0] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` revenue = row[1] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:16: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` date = row[0] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` revenue = row[1] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:16: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` date = row[0] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` revenue = row[1] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:16: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` date = row[0] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` revenue = row[1] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:16: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` date = row[0] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` revenue = row[1] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:16: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` date = row[0] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` revenue = row[1] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:16: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` date = row[0] C:\Users\moses\AppData\Local\Temp\ipykernel_34724\2180264102.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]` revenue = row[1]
| Date | Revenue | |
|---|---|---|
| 0 | Dec 31, 2024 | 97.69B |
| 1 | Dec 31, 2023 | 96.77B |
| 2 | Dec 31, 2022 | 81.46B |
| 3 | Dec 31, 2021 | 53.82B |
| 4 | Dec 31, 2020 | 31.54B |
| 5 | Dec 31, 2019 | Upgrade Pro |
| 6 | Dec 31, 2018 | Upgrade Pro |
| 7 | Dec 31, 2017 | Upgrade Pro |
| 8 | Dec 31, 2016 | Upgrade Pro |
| 9 | Dec 31, 2015 | Upgrade Pro |
In [ ]: